02_forecast_regional_sales.py
import pandas as pd
from prophet import Prophet
import warnings

warnings.filterwarnings('ignore')

# -------------------------------
# LOAD MONTHLY REGIONAL DATA
# -------------------------------
df = pd.read_csv(r"data/Processed/monthly_regional_sales.csv")
df["Month"] = pd.to_datetime(df["Month"])

print("Loaded monthly regional sales")
print(f"Date range: {df['Month'].min()} to {df['Month'].max()}")
print(f"Regions: {df['Region'].unique()}")

# -------------------------------
# FORECAST EACH REGION SEPARATELY
# -------------------------------
regions = df["Region"].unique()
forecast_periods = 6  # Forecast 6 months ahead
all_forecasts = []
all_historical = []

for region in regions:
    print(f"\n--- Processing {region} ---")

    # Filter data for this region
    region_data = df[df["Region"] == region].copy()

    # Prepare data for Prophet (requires 'ds' and 'y' columns)
    prophet_data = region_data[["Month", "Sales"]].copy()
    prophet_data.columns = ["ds", "y"]

    # Train Prophet model for this region
    model = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=False,
        daily_seasonality=False,
        seasonality_mode='multiplicative'  # Better for sales data
    )
    model.fit(prophet_data)

    # Create future dataframe
    future = model.make_future_dataframe(periods=forecast_periods, freq="M")

    # Generate predictions
    forecast = model.predict(future)

    # Extract relevant forecast columns
    forecast_output = forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]].copy()
    forecast_output["Region"] = region

    # Separate historical and future data
    last_historical_date = prophet_data["ds"].max()

    # Historical data (actuals)
    historical = forecast_output[forecast_output["ds"] <= last_historical_date].copy()
    historical["Sales"] = prophet_data["y"].values
    historical["Type"] = "Actual"
    historical = historical[["ds", "Region", "Sales", "Type"]]

    # Future forecasts
    future_forecast = forecast_output[forecast_output["ds"] > last_historical_date].copy()
    future_forecast.rename(columns={"yhat": "Sales"}, inplace=True)
    future_forecast["Type"] = "Forecast"
    future_forecast = future_forecast[["ds", "Region", "Sales", "Type"]]

    # Store results
    all_historical.append(historical)
    all_forecasts.append(future_forecast)

    print(f"{region}: {len(historical)} historical records, {len(future_forecast)} forecast records")

# -------------------------------
# COMBINE ALL REGIONS
# -------------------------------
# Combine historical data from all regions
combined_historical = pd.concat(all_historical, ignore_index=True)

# Combine forecasts from all regions
combined_forecasts = pd.concat(all_forecasts, ignore_index=True)

# Merge historical and forecast data
final_data = pd.concat([combined_historical, combined_forecasts], ignore_index=True)

# Rename date column for Power BI
final_data.rename(columns={"ds": "Month"}, inplace=True)

# Sort by date and region
final_data = final_data.sort_values(["Month", "Region"]).reset_index(drop=True)

print("\n--- FINAL DATASET ---")
print(f"Total records: {len(final_data)}")
print(f"Actual records: {len(final_data[final_data['Type'] == 'Actual'])}")
print(f"Forecast records: {len(final_data[final_data['Type'] == 'Forecast'])}")

# -------------------------------
# SAVE FOR POWER BI
# -------------------------------
output_path = r"data/Predictions/regional_sales_with_forecast.csv"
final_data.to_csv(output_path, index=False)

print(f"\nForecast saved to: {output_path}")
print("\nSample of final data:")
print(final_data.tail(15))